* CGK_Expenditures.do: Cleans the expenditure data as follows
*  - sums expenditures that occur in the same month as recommendend by the BLS
*  - drops 4th and higher observations per interview
*  - drops household with zero food expenditures in any interviews
*  - corrects panel expenditure variables with sample breaks
*
* Coibion-Gorodnichenko-Kueng; Initial version: March 2012; this version: May 2017

global data = "C:\Users\nfumi\Desktop\HANKempirical\InnocentBystandersreplication_folder\source_files\build files for CEX"
global home = "$data"
global savetype "saveold"





			**************
			* Clean data * Note: Overall, 7% of the data is droped in this step.
			**************

use "$data/stata/MTABnew.dta", clear
unique NEWIDunique // count initial # of households and initial # of observations

   
* Drop observations with negative expenditures where there shouldn't be any. Note: Only about 40 observations are affected.
drop if elderly<0 // expenditures on elderly care cannot be negative
unique NEWIDunique

* Drop observations with zero total expenditures. Note: These are empty entries and nothing is lost by deleting them.
drop if totalexp==0
unique NEWIDunique

* Sum expenditures that occur in the same month but are reported in different interviews as recommended by the BLS; see e.g. Souleles (JPubE 2002,p.119). Note: Less than 1% of all observations are affected. 
duplicates report NEWIDunique date // duplicates tag NEWIDunique date, g(tag)
collapse (sum) alcaway-waterT (mean) NEWID REF_YR REF_MO yq QINTRVYR intdateQ, by(NEWIDunique date)
drop yq
gen d=dofq(intdateQ)
format d %td
gen y=yofd(d)
gen q=quarter(d)
gen yq=(y-1900)*10+q
drop intdateQ
gen intdateQ=yq(y,q)
label var intdateQ "interview quarter"
format intdateQ %tq
drop d q y
replace QINTRVYR = floor(QINTRVYR)
order NEWID* REF_YR REF_MO date yq QINTRVYR intdateQ
duplicates report NEWIDunique date
unique NEWIDunique

*save "$data/stata/temp_MTABnew.dta"


/* checkd there is no HH which report more than 4 monthly expenditures per interview
* Drop households that report more than 4 monthly expenditures per interview, since it is difficult to allocate the additional monthly expenditures. Note: This is recommended by the BLS; less than 1% of all observations are affected.
preserve
sort NEWIDunique date
bysort NEWIDunique: egen obsseq=seq()
tab obsseq
keep NEWIDunique obsseq
replace obsseq=(obsseq==4)
collapse (sum) obsseq, by(NEWIDunique)
tab obsseq
drop if obsseq>0
drop obsseq
sort NEWIDunique
$savetype "$data/tempfiles/temp_clean.dta", replace
restore
merge m:1 NEWIDunique using "$data/tempfiles/temp_clean.dta"
keep if _merge==3
drop _merge
unique NEWIDunique
*/

* Drop interviews with less than 3 months of data. Note: Only about 30 of all observations are affected.
preserve
keep NEWIDunique
gen obsperintno=1
gen test = floor(NEWIDunique/10)
gen intno = NEWIDunique - test*10
drop test
collapse (sum) obsperintno, by(NEWIDunique intno)
tab obsperintno
drop if obsperintno<3
drop obsperintno
sort NEWIDunique intno
$savetype "$data/tempfiles/temp_clean.dta", replace
restore
merge m:1 NEWIDunique using "$data/tempfiles/temp_clean.dta"
keep if _merge==3
drop _merge
rm "$data/tempfiles/temp_clean.dta"
unique NEWIDunique




* Drop households that report zero consumption in any interview. Note: Less than 1% of all observations are affected.
preserve
gen food = foodhome + foodaway
drop if date==.
collapse (min) food , by(NEWIDunique)
count // count # of households after dropping zero-food households
drop if food==0 
drop food
sort NEWIDunique
$savetype "$data/tempfiles/temp_clean.dta", replace
restore
merge m:1 NEWIDunique using "$data/tempfiles/temp_clean.dta"
keep if _merge==3
drop _merge
cap n rm "$data/tempfiles/temp_clean.dta"
unique NEWIDunique

* save data
compress
sort NEWIDunique intno REF_YR REF_MO
$savetype "$data/CGK_Expendituresnew.dta", replace




			***********************************************
			* Correct for breaks in expenditure variables *
			***********************************************

use "$data/CGK_Expendituresnew.dta", clear

* Merge interview-month for correction of sample break below
merge m:1 NEWIDunique using "$data/stata/FMLYnew", keepusing(intdate)
keep if _merge==3 // Note: 7% non-matched from using data is due to sample selection above
drop _merge
order NEWID* intno date REF_YR REF_MO intdate intdateQ yq QINTRVYR  

/* There is no such breaks in new data
* foodaway   (Break in 2007Q2: The questionnaire changes in 2007 to ask about usual WEEKLY expenses. In previous
*                              years it asked for usual MONTHLY expenses. )

global var = "foodaway"
gen dummy  = ( intdate< m(2007m4)  )
gen dummy2 = ( intdate>=m(2007m4) & intdate<m(2007m7) )
reg ${var} dummy /*dummy2*/ date if ${var}!=0 & (intdate<m(2007m4) | intdate>m(2007m6))
gen ${var}_original=${var}
replace ${var}=${var}_original-dummy*_b[dummy] /*-dummy2*_b[dummy2]*/ if ${var}!=0 
drop dummy*


* foodhome   (Break in 2008Q2: Probably similar issue as in foodaway, although break is one year later. 
*                              Craig Kreisler from the BLS is looking into this.
*             Break in 1982Q1-1988Q1: Change in questionnaire. See e.g. online appendix to Nalewaik (JME 2006).)

global var = "foodhome"
gen dummy  = ( intdate>=m(1982m1) & intdate<m(1988m1) )
reg ${var} dummy date if ${var}!=0 
gen ${var}_original=${var}
replace ${var}=${var}_original-dummy *_b[dummy] if ${var}!=0 
drop dummy*


* perscareS  (Break in 2001Q2: The question changed slightly.)

global var = "perscareS"
gen dummy  = ( intdate>=m(2001m4) & intdate<m(2004m1) )
gen dummy2 = ( intdate>=m(2004m1) & intdate<m(2004m4) )
gen dummy3 = ( intdate>=m(2004m4) )
reg ${var} dummy dummy2 dummy3 date if ${var}!=0 
gen ${var}_original=${var}
replace ${var}=${var}_original-dummy *_b[dummy]  ///
                              -dummy2*_b[dummy2] ///
                              -dummy3*_b[dummy3] if ${var}!=0 
drop dummy*


* occupexp   (Break in 2001Q2: The question changed slightly.)

global var = "occupexp"
gen dummy  = ( intdate>=m(2001m4) )
reg ${var} dummy date if ${var}!=0 
gen ${var}_original=${var}
replace ${var} = ${var}_original-dummy*_b[dummy] if ${var}!=0 
drop dummy*


* proptax    (Break in 1991Q1)
*
*  Note: Craig Kreisler from the BLS looked into this and found no obvious way to correct for the break in 1991. 
*        He writes:
*         "1991 was a year in which the Interview Survey had some major questionnaire changes. The section that  
*          had to deal with property taxes was most likely changed and in turn the outcome of the variable is not  
*          the same as before. I cannot find much documentation on what the changes were, but I'm sure the break  
*          was caused by these questionnaire changes."

global var = "proptax"
gen ${var}_original=${var}
replace ${var}=0 if intdate<m(1991m4) // Set proptax=0 before 1991m4 because the two variables are not comparable across the break.
*/

compress
sort NEWIDunique intno REF_YR REF_MO
$savetype "$data/CGK_Expendituresnew.dta", replace




			***********************************************
			* Generate nondurables, services and durables *
			***********************************************

use "$data/CGK_Expendituresnew.dta", clear

* Classification are based on NIPA, Chapter 5: 'Consumer Spending'

gen nondurables = ///
 foodhome + foodaway + alchome + alcaway + /// food and beverages
 clothS + clothD + /// clothing and footware
 gasoline + /// gasoline and other energy/fuel
 perscareS + perscareD + /// personal care
 reading + /// magazines, newspapers, etc.
 tobacco 
 
gen durables = /// excluded: educational books (educationD)
 entertainD + /// recreational goods
 furniture + /// furniture and furnishing
 jewelry // jewelry and watches
 
gen services = /// excluded: housing and rent or imputed rental value; health care; (health) insurance; education services; 
 babysit + elderly + /// child-care, hospital, and nursery services (i.e. household operations)
 energy + water + /// household utilities and energy
 entertainS + /// recreation services
 feenchrg + /// financial services
 houseaway + /// accommodations
 phone + /// telecommunication services
 pubtrans // transportation services
 
gen noncons = /// 
 mortgageint + healthexpD + healthexpS + healthins + education + cashcont + pensions + /// mortgage payments, education, health care, pensions, and cash contributions (Parker AER 1999)
 houseexpD + houseexpS + household + rentalexpD + rentalexpS + /// household expenditures (Hsieh AER 2006). This is mostly home insurance, maintenance and hence investment. NOT SURE ABOUT THIS
 lifeinsur + rentpaid + ///
 mealaspay + rentaspay + ///
 occupexp + /// occupation expenses (NOT SURE ABOUT THIS)
 proptax + /// property taxes
 vehpurch + vehexpD + vehexpS // new motor vehicles, parts and assessories
 
gen nondur = nondurables + services
 
gen nondur_strict = /// see Lusardi (JBES 1996), appendix A.2
 foodhome + foodaway + alchome + alcaway + /// food and alcoholic beverages
 tobacco + /// tobacco
 energy + water + /// household utilities and energy (i.e. utilities)
 perscareS +  /// personal care services 
 babysit + elderly + /// child-care, hospital, and nursery services (i.e. household operations)
 pubtrans + /// public transportation
 gasoline // gasoline and motor oil
 
gen food    	  = foodhome+foodaway
*gen food_original = foodhome_original+foodaway_original

gen cons    = nondur+services+durable

gen totalexp2 = cons+noncons

gen totalexp3 = ///       
 /// *** nondurables ***
 foodhome + foodaway + alchome + alcaway + clothS + clothD + gasoline + perscareS + perscareD + reading + tobacco + ///
 /// *** services ***
 babysit + elderly + energy + water + entertainS + feenchrg + houseaway + phone + pubtrans + ///
 /// *** durables *** 
 entertainD + furniture + jewelry + ///
 /// *** non-consumption excluding: (i) health care expenditures (healthexpD healthexpS), (ii) savings (pensions cashcont lifeinsur), (iii) transfers and taxes (mealaspay rentaspay proptax)  *** 
 mortgageint + rentpaid + healthins + education + houseexpD + houseexpS + household + rentalexpD + rentalexpS + occupexp + vehpurch + vehexpD + vehexpS


** Generate topcoding values
gen nondurablesT = ///
 foodhomeT + foodawayT + alchomeT + alcawayT + /// food and beverages
 clothST + clothDT + /// clothing and footware
 gasolineT + /// gasoline and other energy/fuel
 perscareST + perscareDT + /// personal care
 readingT + /// magazines, newspapers, etc.
 tobaccoT 
 
gen durablesT = /// excluded: educational books (educationD)
 entertainDT + /// recreational goods
 furnitureT + /// furniture and furnishing
 jewelryT // jewelry and watches
 
gen servicesT = /// excluded: housing and rent or imputed rental value; health care; (health) insurance; education services; 
 babysitT + elderlyT + /// child-care, hospital, and nursery services (i.e. household operations)
 energyT + waterT + /// household utilities and energy
 entertainST + /// recreation services
 feenchrgT + /// financial services
 houseawayT + /// accommodations
 phoneT + /// telecommunication services
 pubtransT // transportation services
 
gen nonconsT = /// 
 mortgageintT + healthexpDT + healthexpST + healthinsT + educationT + cashcontT + pensionsT + /// mortgage payments, education, health care, pensions, and cash contributions (Parker AER 1999)
 houseexpDT + houseexpST + householdT + rentalexpDT + rentalexpST + /// household expenditures (Hsieh AER 2006). This is mostly home insurance, maintenance and hence investment. NOT SURE ABOUT THIS
 lifeinsurT + rentpaidT + ///
 mealaspayT + rentaspayT + ///
 occupexpT + /// occupation expenses (NOT SURE ABOUT THIS)
 proptaxT + /// property taxes
 vehpurchT + vehexpDT + vehexpST // new motor vehicles, parts and assessories
 
gen nondurT = nondurablesT + servicesT
 
gen nondur_strictT = /// see Lusardi (JBES 1996), appendix A.2
 foodhomeT + foodawayT + alchomeT + alcawayT + /// food and alcoholic beverages
 tobaccoT + /// tobacco
 energyT + waterT + /// household utilities and energy (i.e. utilities)
 perscareST +  /// personal care services 
 babysitT + elderlyT + /// child-care, hospital, and nursery services (i.e. household operations)
 pubtransT + /// public transportation
 gasolineT // gasoline and motor oil
 
gen consT    = nondurT+servicesT+durablesT

gen totalexp2T = consT+nonconsT



** Drop top-coding indicators for expenditure categories without top-coded values **
*sum *T
#delim;
foreach var of varlist
    alcawayT 
    alchomeT 
    babysitT 
   cashcontT 
     clothDT 
     clothST 
  educationT 
    elderlyT 
     energyT 
 entertainDT 
 entertainST 
   feenchrgT 
   foodawayT 
   foodhomeT 
  furnitureT 
   gasolineT 
 healthexpDT 
 healthexpST 
  healthinsT 
  houseawayT 
  houseexpDT 
  houseexpST 
  householdT 
    jewelryT 
  lifeinsurT 
  mealaspayT 
mortgageintT 
   occupexpT 
   pensionsT 
  perscareDT 
  perscareST 
      phoneT 
    proptaxT 
   pubtransT 
    readingT 
 rentalexpDT 
 rentalexpST 
  rentaspayT 
   rentpaidT 
    tobaccoT 
   totalexpT 
    vehexpDT 
    vehexpST 
   vehpurchT 
      waterT 
 {;
  sum `var';
  if `r(mean)'==0 {;
   drop `var';
  };
};
;#delim cr


* Add sample weights
merge m:1 NEWIDunique using "$data/stata/FMLYnew.dta", keepusing(FINLWT21 RESPSTAT)
keep if _merge==3
drop _merge
gen fwt=round(FINLWT21)


* Deflate nominal variables to real dollars 1982-84 (and drop top-coded expenditures?)
gen month=date
replace month = intdate if date==.
format month %tm
order month NEWIDunique
sort  month NEWIDunique
merge m:1 month using "$data/stata/CPI_unew.dta" 
keep if _merge==3
drop _merge
aorder 
order NEWIDunique NEWID intno month date intdate fwt REF_YR REF_MO yq QINTRVYR intdateQ FINLWT21 RESPSTAT
order *T, last
order cpi_u
foreach var in ///
  nondurables services nondur nondur_strict food durables cons totalexp totalexp2 totalexp3 noncons ///
  foodhome foodaway alchome alcaway mortgageint proptax tobacco                     /// nondurables 
  houseexpS houseaway rentpaid rentalexpS energy phone water babysit elderly        /// services
  household clothS gasoline vehexpS pubtrans healthins healthexpS entertainS        ///
  perscareS reading education occupexp feenchrg lifeinsur                           ///  
  houseexpD rentalexpD furniture clothD jewelry vehpurch vehexpD healthexpD         /// durables
  entertainD perscareD                                                              ///  
  mealaspay rentaspay                                                               /// income listed in expenditure files
 {
 *drop if `var'T!=0 // drop top-coded expenditures
 replace `var'= `var'/cpi_u*100
}
drop cpi_u


* Save data of interest for Coibion-Gorodnichenko-Kueng
drop *T // drop top-coding indicators
keep NEWIDunique intno date intdate fwt alcaway-water
sort  NEWIDunique intno date
label data "monthly expenditure data at monthly frequency for Coibion-Gorodnichenko-Kueng"
$savetype "$data/CGK_Expendituresnew.dta", replace




			******************************************
			* Impose same selection on income sample *
			******************************************
/*
* Income data
use "$data/CGK_Incomenew.dta", clear
merge 1:m NEWIDunique using "$data/CGK_Expendituresnew.dta", keepusing(NEWIDunique)
keep if _merge==3
drop _merge

sort intdate NEWIDunique
compress
label data "Income data (raw before imputation) for Coibion-Gorodnichenko-Kueng"
$savetype "$data/CGK_Incomenew.dta", replace
unique NEWIDunique

* Expenditure data
use "$data/CGK_Expendituresnew.dta", clear
merge m:m NEWIDunique using "$data/CGK_Incomenew.dta", keepusing(NEWIDunique) 
keep if _merge==3
drop _merge

label data "monthly expenditure data at monthly frequency for Coibion-Gorodnichenko-Kueng"
$savetype "$data/CGK_Expendituresnew.dta", replace
unique NEWIDunique
*/
